# -*-coding:utf-8-*-
import json
import os
import urllib.request
from pathlib import Path


def node_modules(file_dir):
    # 通过递归遍历 node_modules 每个子包的 package.json 解析下载链接
    links = []
    for root, dirs, files in os.walk(file_dir):
        if 'package.json' in files:
            package_json_file = os.path.join(root, 'package.json')
            try:
                with open(package_json_file, 'r', encoding='UTF-8') as load_f:
                    load_dict = json.load(load_f)
                    if '_resolved' in load_dict.keys():
                        links.append(load_dict['_resolved'])
            except Exception as e:
                print(package_json_file)
                print('Error:', e)
    return links


def package_lock(package_lock_path):
    # 通过递归遍历 package-lock.json 解析下载链接
    links = []
    with open(package_lock_path, 'r', encoding='UTF-8') as load_f:
        load_dict = json.load(load_f)
        search(load_dict, "resolved", links)
    return links


def yarn_lock(package_lock_path):
    # 通过递归遍历 xxx-yarn.lock 解析下载链接
    links = []
    with open(package_lock_path, 'r', encoding='UTF-8') as load_f:
        for line in load_f:
            if line.find('resolved') >= 0:
                line = line.replace('resolved', '')
                url = line.strip().strip('"')
                links.append(url)
    return links


def search(json_object, key, links):
    # 遍历查找指定的key
    for k in json_object:
        if k == key:
            links.append(json_object[k])
        if isinstance(json_object[k], dict):
            search(json_object[k], key, links)
        if isinstance(json_object[k], list):
            for item in json_object[k]:
                if isinstance(item, dict):
                    search(item, key, links)


def download_file(path, store_path, flag):
    # 根据下载链接下载
    if not Path(store_path).exists():
        os.makedirs(store_path, int('0755'))

    links = []
    if path.endswith("package-lock.json"):
        links = package_lock(path)
    elif path.endswith("yarn.lock"):
        links = yarn_lock(path)
    else:
        links = node_modules(path)
    print("link resolved number:" + str(len(links)))

    for url in links:
        try:
            filename = url.split('/')[-1]
            index = filename.find('?')
            # 去掉 ? 参数和 # 哈希
            if index > 0:
                filename = filename[:index]
            index = filename.find('#')
            if index > 0:
                filename = filename[:index]
            filepath = os.path.join(store_path, filename)
            if not Path(filepath).exists():
                print("download:" + url)
                # 以防以后对请求头做限制
                opener = urllib.request.build_opener()
                opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                urllib.request.install_opener(opener)
                if flag:
                    new_path = os.path.join(os.getcwd(), 'tgz')
                    if not Path(new_path).exists():
                        os.makedirs(new_path, int('0755'))
                    filepath = os.path.join(new_path, filename)
                urllib.request.urlretrieve(url, filepath)
            # else:
                # print("file already exists:", filename)
        except Exception as e:
            print('Error Url:' + url)
            print('Error:', e)


if __name__ == '__main__':
    # 通过 xxx 文件解析对应依赖树
    download_link = os.path.join(os.getcwd(), 'package-lock.json')
    # 下载文件存放的路径
    download_path = os.path.join(os.getcwd(), 'node')
    # 下载文件是否存放到一个新的路径里，默认存放到 node 目录下, download_flag 为 True 时 存放到 tgz 目录下
    download_flag = False
    download_file(download_link, download_path, download_flag)
    print("ok")
